Read the data from the kaggle website https://www.kaggle.com/karangadiya/fifa19.
(load("fifa19small.rda"))
## [1] "fifa19small"
rownames(fifa19small) <- fifa19small$Name
fifa19small["R. Lewandowski",]
## Name Club Position Value.EUR Age
## R. Lewandowski R. Lewandowski FC Bayern München ST 7.7e+07 29
## Overall Special Preferred.Foot International.Reputation
## R. Lewandowski 90 2152 Right 4
## Weak.Foot Skill.Moves Crossing Finishing HeadingAccuracy
## R. Lewandowski 4 4 62 91 85
## ShortPassing Volleys Dribbling Curve FKAccuracy LongPassing
## R. Lewandowski 83 89 85 77 86 65
## BallControl Acceleration SprintSpeed Agility Reactions
## R. Lewandowski 89 77 78 78 90
## Balance ShotPower Jumping Stamina Strength LongShots
## R. Lewandowski 78 88 84 78 84 84
## Aggression Interceptions Positioning Vision Penalties
## R. Lewandowski 80 39 91 77 88
## Composure Marking StandingTackle SlidingTackle GKDiving
## R. Lewandowski 86 34 42 19 15
## GKHandling GKKicking GKPositioning GKReflexes
## R. Lewandowski 6 12 8 10
Value is skewed. Will be much easier to model sqrt(Value).
fifa19small$SqrtValue <- sqrt(fifa19small$Value.EUR)
fifa19small <- fifa19small[,-c(1, 2, 3, 4, 6)]
Value is skewed. Will be much easier to model sqrt(Value).
library("ggplot2")
library("DALEX")
## Welcome to DALEX (version: 0.4.9).
## Find examples and detailed introduction at: https://pbiecek.github.io/PM_VEE/
ggplot(fifa19small, aes(Age, SqrtValue)) +
geom_point() + geom_smooth(se = FALSE) +
theme_drwhy()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(fifa19small, aes(Age)) +
geom_histogram() +
theme_drwhy()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(fifa19small, aes(Reactions, SqrtValue)) +
geom_point() + geom_smooth(se = FALSE) +
theme_drwhy()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(fifa19small, aes(Reactions)) +
geom_histogram() +
theme_drwhy()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(fifa19small, aes(BallControl, SqrtValue)) +
geom_point() + geom_smooth(se = FALSE) +
theme_drwhy()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(fifa19small, aes(BallControl)) +
geom_histogram() +
theme_drwhy()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(fifa19small, aes(ShortPassing, SqrtValue)) +
geom_point() + geom_smooth(se = FALSE) +
theme_drwhy()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(fifa19small, aes(ShortPassing)) +
geom_histogram() +
theme_drwhy()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(fifa19small, aes(Dribbling, SqrtValue)) +
geom_point() + geom_smooth(se = FALSE) +
theme_drwhy()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(fifa19small, aes(Dribbling)) +
geom_histogram() +
theme_drwhy()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Let’s create following models:
gbm model with 250 trees 4 levels depth,gbm model with 250 trees 1 level depth,randomForest model with 250 trees.library("gbm")
## Loaded gbm 2.1.5
fifa_gbm_deep <- gbm(SqrtValue~., data = fifa19small, n.trees = 250, interaction.depth = 4)
## Distribution not specified, assuming gaussian ...
fifa_gbm_shallow <- gbm(SqrtValue~., data = fifa19small, n.trees = 250, interaction.depth = 1)
## Distribution not specified, assuming gaussian ...
library("ranger")
fifa_rf <- ranger(SqrtValue~., data = fifa19small, num.trees = 250)
library("rms")
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
fifa_ols <- ols(SqrtValue ~ rcs(Age) + rcs(Special) + rcs(International.Reputation) + rcs(Skill.Moves) + rcs(Crossing) + rcs(Finishing) + rcs(HeadingAccuracy) + rcs(ShortPassing) + rcs(Volleys) + rcs(Dribbling) + rcs(Curve) + rcs(FKAccuracy) + rcs(LongPassing) + rcs(BallControl) + rcs(Acceleration) + rcs(SprintSpeed) + rcs(Agility) + rcs(Reactions) + rcs(Balance) + rcs(ShotPower) + rcs(Jumping) + rcs(Stamina) + rcs(Strength) + rcs(LongShots) + rcs(Aggression) + rcs(Interceptions) + rcs(Positioning) + rcs(Vision) + rcs(Penalties) + rcs(Composure) + rcs(Marking) + rcs(StandingTackle) + rcs(SlidingTackle) + rcs(GKDiving) + rcs(GKHandling) + rcs(GKKicking) + rcs(GKPositioning) + rcs(GKReflexes), data = fifa19small)
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied
## = fractied): 5 knots requested with 5 unique values of x. knots set to 3
## interior values.
## Warning in rcspline.eval(x, nk = nknots, inclx = TRUE, pc = pc, fractied
## = fractied): 5 knots requested with 5 unique values of x. knots set to 3
## interior values.
library("DALEX")
fifa_gbm_exp_deep <- explain(fifa_gbm_deep,
data = fifa19small,
y = fifa19small$SqrtValue^2,
predict_function = function(m,x)
predict(m, x, n.trees = 250)^2,
label = "GBM deep")
## Preparation of a new explainer is initiated
## -> model label : GBM deep
## -> data : 16924 rows 41 cols
## -> target variable : 16924 values
## -> predict function : function(m, x) predict(m, x, n.trees = 250)^2
## -> predicted values : numerical, min = 2.034728 , mean = 2484612 , max = 108062726
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -12833791 , mean = 49865.43 , max = 16361034
## -> model_info : package gbm , ver. 2.1.5 , task regression ( [33m default [39m )
## [32m A new explainer has been created! [39m
fifa_gbm_exp_shallow <- explain(fifa_gbm_shallow,
data = fifa19small,
y = fifa19small$SqrtValue^2,
predict_function = function(m,x)
predict(m, x, n.trees = 250)^2,
label = "GBM shallow")
## Preparation of a new explainer is initiated
## -> model label : GBM shallow
## -> data : 16924 rows 41 cols
## -> target variable : 16924 values
## -> predict function : function(m, x) predict(m, x, n.trees = 250)^2
## -> predicted values : numerical, min = 0.6418461 , mean = 2392380 , max = 88928951
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -38611620 , mean = 142097.5 , max = 33732602
## -> model_info : package gbm , ver. 2.1.5 , task regression ( [33m default [39m )
## [32m A new explainer has been created! [39m
fifa_rf_exp <- explain(fifa_rf,
data = fifa19small,
y = fifa19small$SqrtValue^2,
predict_function = function(m,x)
predict(m, x)$predictions^2,
label = "RF")
## Preparation of a new explainer is initiated
## -> model label : RF
## -> data : 16924 rows 41 cols
## -> target variable : 16924 values
## -> predict function : function(m, x) predict(m, x)$predictions^2
## -> predicted values : numerical, min = 5784.248 , mean = 2433673 , max = 95793175
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -6039446 , mean = 100804.5 , max = 23714519
## -> model_info : package ranger , ver. 0.11.2 , task regression ( [33m default [39m )
## [32m A new explainer has been created! [39m
fifa_rms_exp <- explain(fifa_ols,
data = fifa19small,
y = fifa19small$SqrtValue^2,
predict_function = function(m,x)
predict(m, x)^2,
label = "RMS")
## Preparation of a new explainer is initiated
## -> model label : RMS
## -> data : 16924 rows 41 cols
## -> target variable : 16924 values
## -> predict function : function(m, x) predict(m, x)^2
## -> predicted values : numerical, min = 0.009600715 , mean = 2452489 , max = 103082633
## -> residual function : difference between y and yhat ( [33m default [39m )
## -> residuals : numerical, min = -25691692 , mean = 81988.25 , max = 43440034
## -> model_info : package stats , ver. 3.6.1 , task regression ( [33m default [39m )
## [32m A new explainer has been created! [39m
library("auditor")
##
## Attaching package: 'auditor'
## The following object is masked from 'package:DALEX':
##
## model_performance
fifa_mr_gbm_shallow <- model_residual(fifa_gbm_exp_shallow)
fifa_mr_gbm_deep <- model_residual(fifa_gbm_exp_deep)
fifa_mr_gbm_rf <- model_residual(fifa_rf_exp)
fifa_mr_gbm_rms <- model_residual(fifa_rms_exp)
plot_residual_boxplot(fifa_mr_gbm_shallow, fifa_mr_gbm_deep, fifa_mr_gbm_rf, fifa_mr_gbm_rms) +
scale_y_sqrt()
plot_prediction(fifa_mr_gbm_shallow, abline = TRUE) +
scale_y_sqrt() + scale_x_sqrt()
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
plot_prediction(fifa_mr_gbm_deep, abline = TRUE) +
scale_y_sqrt() + scale_x_sqrt()
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
plot_prediction(fifa_mr_gbm_rf, abline = TRUE) +
scale_y_sqrt() + scale_x_sqrt()
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
plot_prediction(fifa_mr_gbm_rms, abline = TRUE) +
scale_y_sqrt() + scale_x_sqrt()
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
library("ingredients")
##
## Attaching package: 'ingredients'
## The following object is masked from 'package:auditor':
##
## plotD3
## The following object is masked from 'package:Hmisc':
##
## describe
fifa_feat <- ingredients::feature_importance(fifa_gbm_exp_shallow)
plot(fifa_feat, max_vars = 12)
## Warning: Please note that 'theme_drwhy_colors()' is now deprecated, it is
## better to use 'colors_discrete_drwhy()' instead.
fifa_feat <- ingredients::feature_importance(fifa_gbm_exp_deep)
plot(fifa_feat, max_vars = 12)
fifa_feat <- ingredients::feature_importance(fifa_rf_exp)
plot(fifa_feat, max_vars = 12)
fifa_feat <- ingredients::feature_importance(fifa_rms_exp)
plot(fifa_feat, max_vars = 12)
fifa19_pd_shallow <- ingredients::partial_dependency(fifa_gbm_exp_shallow, variables = c("Age", "Reactions","BallControl", "Dribbling"))
fifa19_pd_deep <- ingredients::partial_dependency(fifa_gbm_exp_deep, variables = c("Age", "Reactions","BallControl", "Dribbling"))
fifa19_pd_rf <- ingredients::partial_dependency(fifa_rf_exp, variables = c("Age", "Reactions","BallControl", "Dribbling"))
fifa19_pd_rms <- ingredients::partial_dependency(fifa_rms_exp, variables = c("Age", "Reactions","BallControl", "Dribbling"))
plot(fifa19_pd_shallow, fifa19_pd_deep, fifa19_pd_rf, fifa19_pd_rms) +
scale_y_log10()
library("iBreakDown")
##
## Attaching package: 'iBreakDown'
## The following objects are masked from 'package:ingredients':
##
## describe, plotD3
## The following object is masked from 'package:auditor':
##
## plotD3
## The following object is masked from 'package:Hmisc':
##
## describe
fifa_pg <- break_down(fifa_gbm_exp_shallow, new_observation = fifa19small["R. Lewandowski",])
plot(fifa_pg)
fifa_pg <- break_down(fifa_gbm_exp_deep, new_observation = fifa19small["R. Lewandowski",])
plot(fifa_pg)
fifa_pg <- break_down(fifa_rf_exp, new_observation = fifa19small["R. Lewandowski",])
plot(fifa_pg)
fifa_pg <- break_down(fifa_rms_exp, new_observation = fifa19small["R. Lewandowski",])
plot(fifa_pg)
library("iBreakDown")
fifa_pg <- break_down(fifa_gbm_exp_shallow, new_observation = fifa19small["R. Lewandowski",], interactions = TRUE)
plot(fifa_pg)
fifa_pg <- break_down(fifa_gbm_exp_deep, new_observation = fifa19small["R. Lewandowski",], interactions = TRUE)
plot(fifa_pg)
fifa_pg <- break_down(fifa_rf_exp, new_observation = fifa19small["R. Lewandowski",], interactions = TRUE)
plot(fifa_pg)
fifa_pg <- break_down(fifa_rms_exp, new_observation = fifa19small["R. Lewandowski",], interactions = TRUE)
plot(fifa_pg)
fifa_cp_shallow <- ceteris_paribus(fifa_gbm_exp_shallow,
new_observation = fifa19small["R. Lewandowski",], variables = c("Age", "Reactions","BallControl", "Dribbling"),
variable_splits = list(Age = seq(15,45,0.1), Reactions = seq(20,100,0.1), BallControl = seq(20,100,0.1), Dribbling = seq(20,100,0.1))
)
fifa_cp_deep <- ceteris_paribus(fifa_gbm_exp_deep,
new_observation = fifa19small["R. Lewandowski",], variables = c("Age", "Reactions","BallControl", "Dribbling"),
variable_splits = list(Age = seq(15,45,0.1), Reactions = seq(20,100,0.1), BallControl = seq(20,100,0.1), Dribbling = seq(20,100,0.1))
)
fifa_cp_rf <- ceteris_paribus(fifa_rf_exp,
new_observation = fifa19small["R. Lewandowski",], variables = c("Age", "Reactions","BallControl", "Dribbling"),
variable_splits = list(Age = seq(15,45,0.1), Reactions = seq(20,100,0.1), BallControl = seq(20,100,0.1), Dribbling = seq(20,100,0.1))
)
fifa_cp_rms <- ceteris_paribus(fifa_rms_exp,
new_observation = fifa19small["R. Lewandowski",], variables = c("Age", "Reactions","BallControl", "Dribbling"),
variable_splits = list(Age = seq(15,45,0.1), Reactions = seq(20,100,0.1), BallControl = seq(20,100,0.1), Dribbling = seq(20,100,0.1))
)
plot(fifa_cp_shallow, fifa_cp_deep, fifa_cp_rf, fifa_cp_rms, color = "_label_") +
show_observations(fifa_cp_rf, fifa_cp_shallow, fifa_cp_deep,fifa_cp_rms, variables = c("Age", "Reactions","BallControl", "Dribbling")) +
scale_y_log10()
library(modelStudio)
fifa19_ms <- modelStudio(fifa_gbm_shallow, new_observation = fifa19small[c("Cristiano Ronaldo","R. Lewandowski"), ], B = 5, digits = 0)
op <- modelStudioOptions(
margin.left = 6
)
print(fifa19_ms, options = op)
r2d3::save_d3_html(fifa19_ms, file = "fifa19.html")